rm(list=ls())

packs <- c("tidyverse", "ggplot2", "stargazer", "sp", "maptools", "rgeos", "rgdal") 
# install.packages(packs, dependencies = TRUE) ## uncomment if you need to install any of the necessary packages
lapply(packs, library, character.only = TRUE)

setwd("C:/Users/mar20012/Dropbox/AMR_1948/Depopulation_Social_Cohesion/IS_Supplementary_Materials")
#setwd("~/Dropbox/AMR_1948/Depopulation_Social_Cohesion")


village_master <- read_csv("data/village_masterfile.csv") %>% 
  as_tibble() %>%
  mutate(
    jewishpop_pct = ifelse(!is.na(jew_pop_45) & !is.na(total_pop_45), jew_pop_45/total_pop_45, NA)
    , jewish_maj = ifelse(is.na(jewishpop_pct) | jewishpop_pct < .5, 0, 1)
    ###################################################
    ##### Variable indicating the Village evacuation outcome
    ###################################################
    , depop_outcome = NA
    , depop_outcome = ifelse(exodus_cause == 0, "no evacuation", depop_outcome)
    , depop_outcome = ifelse(exodus_cause == 1, "reactive evacuation", depop_outcome)
    , depop_outcome = ifelse(exodus_cause == 2, "reactive evacuation", depop_outcome)
    , depop_outcome = ifelse(exodus_cause == 3, "preemptive evacuation", depop_outcome)
    , depop_outcome = ifelse(exodus_cause == 4, "preemptive evacuation", depop_outcome)
    , depop_outcome = ifelse(exodus_cause == 5, "preemptive evacuation", depop_outcome)
    , depop_outcome = ifelse(exodus_cause == 6, "preemptive evacuation", depop_outcome)
    ###################################################
    ##### Variable indicating the uncontested West Bank and Southern Gaza areas 
    ###################################################
    , WestBank = ifelse(district=="Hebron" | district=="Ramallah" | district=="Nablus" | district=="Jinin", 1, 0)
    , WestBank = ifelse(district=="Jerusalem" & (village=="Jericho" | village=="Auja, Arab al-Nusyrat, Arab alKa'abina, Arab al-'Urenat, Arab al-Sa'ayda" | village=="Mukhmas" | village=="Duyuk" | village=="Nu'eima" | village == "Nabi Musa & Palestine Potash Concession"), 1, WestBank)
    , SouthernGaza = ifelse(district=="Gaza" & (id == 541
                                                | id == 542
                                                | id == 829
                                                | id == 958
                                                | id == 526
                                                | id == 1203
                                                | id == 1326
                                                | id == 1354
                                                | id == 1453
                                                | id == 1466
                                                | id == 1500
    )
    , 1, 0)
    , included = ifelse(WestBank == 1 | SouthernGaza == 1, "uncontested", "contested")
    ## there are some village duplicates, but the duplicate IDs are all true duplicates (same village, not two different villages with the same id)
    , dup_id = as.numeric(duplicated(id))
  ) %>%
  ## drop duplicates
  filter(dup_id < 1, refugee_camp == 0, bedouin == 0, pop0 == 0, jewish_only == 0)


village_master$depop_outcome <- as.factor(village_master$depop_outcome)

village_master$village[village_master$village=="Haifa  (includes Ahuzat Sir Herbert Samuel)"] <- "Haifa"


write_csv(village_master, "data/village_sample_initial.csv")


############################################################################
############################################################################
## Exclude West Bank and Southern Gaza to create the sample of villages contested in the 1948 War
############################################################################
############################################################################


village_sample_contested <- village_master %>%
  filter(included == "contested")

write_csv(village_sample_contested, "data/village_sample_contested.csv")



############################################################################
############################################################################
############################################################################
############################################################################

village_depop <- read_csv("data/village_depop_data.csv")

pvf <- read_csv("data/PVF.csv")

village_dist <- read_csv("data/village_neighborhood_depop.csv")

villagestats <- read_csv("data/village_statistics.csv")

village_sample_analysis <- village_sample_contested %>%
  filter(v_survey_collected == 1) %>%
  left_join(village_depop) %>%
  left_join(pvf) %>%
  left_join(village_dist) %>%
  left_join(villagestats) %>%
  filter(...)



############################################################################
############################################################################
## Filter to contested villages that have a Village File (within information on social cohesion indicators) to subset to the sample for main analysis
############################################################################
############################################################################



write_csv(village_sample_analysis, "data/village_sample_analysis.csv")
